from PIL import Image, ImageDraw
import json
import os
import re
import pdb
import random
import argparse
import numpy as np
from collections import Counter
from IPython.display import display
from tqdm import tqdm
random.seed(42)  # 固定随机种子


def get_answer(step):
    action_type_id = step['action_type_id']
    action_type_text = step['action_type_text']

    click_point = None
    type_text = None
    if action_type_id == 4:
        if action_type_text == 'click':
            touch = step['touch']
            lift = step['lift']
            click_point = [(touch[0] + lift[0]) / 2, (touch[1] + lift[1]) / 2]
            click_point = [round(item, 2) for item in click_point]
    elif action_type_id == 3:
        type_text = step['type_text']

    answer = {'action': action_type_text.upper(), 'value': type_text, 'position': click_point}
    if 'think' in step:
        final_answer = """{}\n<answer>{}</answer>""".format(step["think"], answer)
    else:
        final_answer = answer
    return final_answer


_WEB_SINGLE_SYSTEM = """You are an assistant trained to navigate the web. 
Given a task instruction, a screenshot, and a last history action summary, output the think and ext action and wait for the next observation. 
The think must strictly follow these reasoning steps:
(1) Progress Estimation: Interface Comprehension and Progress Estimation
(2) Decesion Reasoning: Strategy Formulation
(3) History Summary: Update the history action summary according to the last history action summary and the action you executed.

## Action Space
1. `CLICK`: Click on an element, value is not applicable and the position [x,y] is required. 
2. `TYPE`: Type a string into an element, value is a string to type and the position is not applicable.
3. `SELECT`: Select a value for an element, value is the value to select and the position is not applicable.
4. `SCROLL UP`: Scroll up for the screen.
5. `SCROLL DOWN`: Scroll down for the screen.
6. `SCROLL LEFT`: Scroll left for the screen.
7. `SCROLL RIGHT`: Scroll right for the screen.
8. `PRESS BACK`: Press for returning to the previous step, value and position are not applicable.
9. `PRESS HOME`: Press for returning to the home screen, value and position are not applicable.
10. `PRESS ENTER`: Press for submitting the input content, value and position are not applicable.
11. `STATUS TASK COMPLETE`: Indicate the task is completed, value and position are not applicable.
12. `STATUS TASK IMPOSSIBLE `: Indicate the task is impossible to complete, value and position are not applicable.

## Output Format
<Progress Estimation>
...
</Progress Estimation>
<Decesion Reasoning>
...
</Decesion Reasoning>
<answer>
{{'action': 'ACTION_TYPE', 'value': 'element', 'position': [x,y]}}
</answer>
<History Summary>
...
</History Summary>

If value or position is not applicable, set it as `None`.
Position represents the relative coordinates on the screenshot and should be scaled to a range of 0-1.
"""



parent_dir = "Dataset path"
imgs_dir =  f"{parent_dir}/AITW/images"
aitw_data = json.load(open(f"{parent_dir}/AITW/aitw_data_train.json", 'r'))


total_step = []
step_i = 0
for scenario in aitw_data:
    aitw_subset = aitw_data[scenario]
    for sample in tqdm(aitw_subset):
        # print(sample)
        confirmed_task = sample[0]['goal']

        previous_actions = []
        step_history = []
        for i, step in enumerate(sample):
            filename = step['img_filename']
            img_url = os.path.join(imgs_dir, filename) + '.png'
            if not os.path.exists(img_url):
                print(img_url)
                continue
            image = Image.open(img_url)
            action_id = step["action_type_id"]
            action_type = step["action_type_text"]

            answer_dict = get_answer({"action_type_id": action_id,
                                    "action_type_text": action_type,
                                    "annot_position": step['annot_position'],
                                    "touch": step['touch'],
                                    "lift": step['lift'],
                                    "type_text": step['type_text'],})
            cur_answer = str(answer_dict)

            previous_step = ""
            for j, action in enumerate(previous_actions):
                previous_step += 'Step' + str(j) + ', previous action: ' + action[:-1] + "}. "
            # import ipdb; ipdb.set_trace()

            total_step.append({
                "id": "aitw_{}".format(step_i),
                "step_id": step_i,
                "image": img_url,
                "problem": _WEB_SINGLE_SYSTEM,
                "solution": cur_answer,
                "task": confirmed_task,
                "history": previous_step,
                "bbox_ref": step['annot_position'], 
                "step": step,
                "is_last": i == len(sample)-1,
                "is_first": len(previous_actions) == 0,
            })
            previous_actions.append(cur_answer)

            step_history.append(step)
            step_i += 1

            # print(total_step[-1])
            # import ipdb; ipdb.set_trace()

import jsonlines  
save_url = "Your save path"
with jsonlines.open(save_url, mode="w") as writer:
    writer.write_all(total_step)